1 /* 2 Copyright: Marcelo S. N. Mancini (Hipreme|MrcSnm), 2018 - 2021 3 License: [https://creativecommons.org/licenses/by/4.0/|CC BY-4.0 License]. 4 Authors: Marcelo S. N. Mancini 5 6 Copyright Marcelo S. N. Mancini 2018 - 2021. 7 Distributed under the CC BY-4.0 License. 8 (See accompanying file LICENSE.txt or copy at 9 https://creativecommons.org/licenses/by/4.0/ 10 */ 11 module hip.util..string; 12 public import hip.util.conv:to; 13 public import hip.util.to_string_range; 14 15 version(WebAssembly) version = UseDRuntimeDecoder; 16 version(CustomRuntimeTest) version = UseDRuntimeDecoder; 17 version(PSVita) version = UseDRuntimeDecoder; 18 19 /** 20 * RefCounted, @nogc string, OutputRange compatible, 21 */ 22 struct String 23 { 24 @nogc: 25 import core.stdc.string; 26 import core.stdc.stdlib; 27 import core.int128; 28 char[] chars; 29 private size_t _capacity; 30 private int* countPtr; 31 size_t length() const {return chars.length;} 32 33 this(this) 34 { 35 if(countPtr !is null) 36 *countPtr = *countPtr + 1; 37 } 38 39 private void initialize(size_t length) 40 { 41 if(length == 0) 42 length = 128; 43 this.chars = (cast(char*)malloc(length))[0..0]; 44 this.countPtr = cast(int*)malloc(int.sizeof); 45 this._capacity = length; 46 this.chars.ptr[0.._capacity] = '\0'; 47 *countPtr = 1; 48 } 49 50 static auto opCall(string str) 51 { 52 String s; 53 s.initialize(str.length); 54 s.chars = s.chars.ptr[0..str.length]; 55 s.chars[] = str[]; 56 return s; 57 } 58 static auto opCall(const(char)* str){return opCall(str[0..strlen(str)]);} 59 static auto opCall(String str){return str;} 60 61 private enum isAppendable(T) = is(T == String) || is(T == string) || is(T == immutable(char)*) || is(T == char); 62 63 static auto opCall(Args...)(Args args) 64 { 65 import hip.util.conv:toStringRange; 66 String s; 67 s.initialize(128); 68 static foreach(a; args) 69 { 70 static if(isAppendable!(typeof(a)) ) 71 s~= a; 72 else static if(is(typeof(a) == struct) || __traits(compiles, toStringRange(s, a))) 73 { 74 toStringRange(s, a); 75 } 76 else static if(__traits(hasMember, a, "toString")) 77 s~= a.toString; 78 else static assert(false, "No conversion found"); 79 } 80 return s; 81 } 82 83 alias _opApplyFn = int delegate(char c) @nogc; 84 int opApply(scope _opApplyFn dg) 85 { 86 int result = 0; 87 for(int i = 0; i < length && result; i++) 88 result = dg(chars[i]); 89 return result; 90 } 91 92 /** 93 * If it was borrowed, allocate new memory. 94 */ 95 bool updateBorrowed(size_t length) 96 { 97 if(countPtr == null) //Not initialized 98 { 99 initialize(length); 100 return true; 101 } 102 else if(*countPtr != 1) //If it is borrowed 103 { 104 //Remove that old reference and initialize itself (something like when slices shares a common array) 105 char[] oldChars = chars; 106 *countPtr = *countPtr - 1; 107 initialize(length+this.length); 108 chars = chars.ptr[0..oldChars.length]; 109 chars[0..oldChars.length] = oldChars[0..$]; 110 return true; 111 } 112 return false; 113 } 114 115 auto ref opOpAssign(string op, T)(T value) 116 if(op == "~") 117 { 118 String temp; 119 char[] chs; 120 static if(is(T == String)) 121 chs = value.chars; 122 else static if (is(T == string) || is(T == char[])) 123 chs = cast(char[])value; 124 else static if(is(T == immutable(char)*)) 125 chs = value[0..strlen(value)]; 126 else static if(is(T == char)) 127 { 128 char[1] _chContainer; 129 _chContainer[0] = value; 130 chs = _chContainer; 131 } 132 else 133 { 134 temp = String(value); 135 chs = temp.chars; 136 } 137 if(!updateBorrowed(chs.length) && chs.length + this.length >= this._capacity) //New size is greater than capacity 138 resize(cast(uint)((chs.length + this.length)*1.5)); 139 memcpy(chars.ptr+length, chs.ptr, chs.length); 140 chars = chars.ptr[0..chars.length+chs.length]; 141 return this; 142 } 143 144 auto ref opAssign(string value) 145 { 146 if(countPtr is null) 147 chars = cast(char[])value; //Don't allocate memory for the string literal. 148 else 149 { 150 bool resized = updateBorrowed(value.length); 151 if(!resized) 152 { 153 if(chars == null) 154 initialize(value.length); 155 else if(value.length > _capacity) 156 resize(value.length); 157 } 158 chars.ptr[0..value.length] = value[]; 159 } 160 return this; 161 } 162 163 auto ref opAssign(immutable(char)* value) 164 { 165 opAssign(value[0..strlen(value)]); 166 return this; 167 } 168 169 string opCast() const 170 { 171 return cast(string)chars[0..length]; 172 } 173 string toString() const {return cast(string)chars;} 174 175 pragma(inline, true) private void resize(size_t newSize) 176 { 177 chars = (cast(char*)realloc(chars.ptr, newSize))[0..chars.length]; 178 _capacity = newSize; 179 } 180 ///Make this struct OutputRange compatible 181 void put(char c) 182 { 183 if(this.length + 1 >= this._capacity) 184 resize(cast(uint)((this.length+1)*1.5)); 185 chars.ptr[length] = c; 186 chars = chars.ptr[0..length+1]; 187 } 188 bool opEquals(R)(const R other) const 189 { 190 static if(is(R == typeof(null))) 191 return chars == null; 192 else static if(is(R == string)) 193 return toString == other; 194 else static if(is(R == String)) 195 return toString == other.toString; 196 else static assert(false, "Invalid comparison between String and "~R.stringof); 197 } 198 199 /** 200 * This function serves to allocate before put. This will make less allocations occur while iterating 201 * this struct as an OutputRange. 202 */ 203 void preAllocate(uint howMuch) 204 { 205 if(length + howMuch > _capacity) 206 resize(_capacity + howMuch); 207 } 208 void preAllocate(ulong howMuch){preAllocate(cast(uint)howMuch);} 209 210 ref auto opIndex(size_t index) 211 { 212 assert(index < length, "Index out of bounds"); 213 return chars[index]; 214 } 215 216 ~this() 217 { 218 if(countPtr != null) 219 { 220 *countPtr = *countPtr - 1; 221 assert(*countPtr >= 0); 222 if(*countPtr == 0 && chars != null) 223 { 224 free(chars.ptr); 225 free(countPtr); 226 } 227 countPtr = null; 228 chars = null; 229 } 230 } 231 232 } 233 234 struct StringBuilder 235 { 236 private char[] builtString; 237 private uint builtLength; 238 string[] strings; 239 private uint stringsPtr = 0; 240 241 void append(T)(T value) 242 { 243 if(stringsPtr == strings.length) 244 { 245 if(strings.length == 0x10000) //65K (This will guarantee a reasonable amount of allocations) 246 toString(); 247 else 248 { 249 //128 is a reasonable start, this way, no really small operation should matter on performance 250 strings.length = strings.length == 0 ? 128 : strings.length * 2; 251 } 252 } 253 strings[stringsPtr++] = value; 254 } 255 string toString() 256 { 257 import core.stdc.string:memcpy; 258 if(stringsPtr == 0) return cast(string)builtString[0..builtLength]; 259 uint count = builtLength; 260 uint i = builtLength; 261 foreach(s;strings[0..stringsPtr]) 262 count+= s.length; 263 builtString.length = count; 264 265 foreach(s; strings[0..stringsPtr]) 266 { 267 memcpy(builtString.ptr+i, s.ptr, s.length); 268 i+= s.length; 269 } 270 builtLength = count; 271 stringsPtr = 0; 272 return cast(string)builtString[0..builtLength]; 273 } 274 auto ref opAssign(T)(T value) if(is(T == string)) 275 { 276 builtString.length = value.length; 277 foreach(i, c; s) 278 builtString[i] = c; 279 stringsPtr = 0; 280 builtLength = cast(typeof(builtLength))value.length; 281 282 return this; 283 } 284 auto ref opOpAssign(string op, T)(T value) if(op == "~") 285 { 286 import std.traits:isArray; 287 static if(isArray!T && !is(T == string)) 288 foreach(v; value) append(v); 289 else 290 append(value); 291 return this; 292 } 293 ref auto opIndex(size_t index){return toString()[index];} 294 uint length(){return builtLength;} 295 ~this(){strings.length = 0;} 296 297 ///Interface for OutputRange 298 alias put = append; 299 } 300 301 302 pure dstring toUTF32(string encoded) 303 { 304 dstring decoded; 305 version(UseDRuntimeDecoder) 306 { 307 foreach(dchar ch; encoded) decoded~= ch; 308 } 309 else 310 { 311 static import std.utf; 312 decoded = std.utf.toUTF32(encoded); 313 } 314 return decoded; 315 } 316 317 pure TString replaceAll(TChar, TString = TChar[])(TString str, TChar what, TString replaceWith = "") 318 { 319 string ret; 320 for(int i = 0; i < str.length; i++) 321 { 322 if(str[i] != what) ret~= str[i]; 323 else if(replaceWith != "") ret~=replaceWith; 324 } 325 return ret; 326 } 327 328 pure TString replaceAll(TString)(TString str, TString what, TString replaceWith = "") 329 { 330 char[] ret; 331 int last; 332 int i; 333 do 334 { 335 i = indexOf(str, what, i); 336 if(i != -1) 337 { 338 int copyLength = i - last; 339 int currLength = cast(int)ret.length; 340 ret.length+= copyLength+replaceWith.length; 341 //Copy old content 342 ret[currLength..currLength+copyLength] = str[last..i]; 343 //Copy replace 344 ret[currLength+copyLength..$] = replaceWith[]; 345 //Skip what 346 i+= what.length; 347 last = i; 348 } 349 } while(i != -1); 350 351 int copyLength = cast(int)(str.length - last); 352 int currLength = cast(int)ret.length; 353 ret.length+= copyLength; 354 ret[currLength..$] = str[last..$]; 355 356 return cast(TString)ret; 357 } 358 359 pure int indexOf (TString)(inout TString str,inout TString toFind, int startIndex = 0) nothrow @nogc @safe 360 { 361 if(!toFind.length) 362 return -1; 363 int left = 0; 364 365 for(int i = startIndex; i < str.length; i++) 366 { 367 if(str[i] == toFind[left]) 368 { 369 left++; 370 if(left == toFind.length) 371 return (i+1) - left; //Remember that left is already out of bounds 372 } 373 else if(left > 0) 374 left--; 375 } 376 return -1; 377 } 378 379 pure bool startsWith(TString)(inout TString str, inout TString withWhat) nothrow @nogc @safe 380 { 381 if(withWhat.length > str.length) 382 return false; 383 int index = 0; 384 while(index < withWhat.length && str[index] == withWhat[index]) 385 index++; 386 return index == withWhat.length; 387 } 388 389 /** 390 * Same thing as startsWith, but returns the part after the afterWhat 391 */ 392 pure string after(TString)(TString str, immutable TString afterWhat) nothrow @nogc @safe 393 { 394 bool has = str.startsWith(afterWhat); 395 if(!has) 396 return null; 397 return str[afterWhat.length..$]; 398 } 399 400 pure inout(TString) findAfter(TString)(inout TString str, inout TString afterWhat, int startIndex = 0) nothrow @nogc @safe 401 { 402 int afterWhatIndex = str.indexOf(afterWhat, startIndex); 403 if(afterWhatIndex == -1) 404 return null; 405 return str[afterWhatIndex+afterWhat.length..$]; 406 } 407 408 /** 409 * Returns the content that is between `left` and `right`: 410 ```d 411 string test = `string containing a "thing"`; 412 writeln(test.between(`"`, `"`)); //thing 413 ``` 414 */ 415 pure inout(TString) between(TString)(inout TString str, inout TString left, inout TString right, int start = 0) nothrow @nogc @safe 416 { 417 int leftIndex = str.indexOf(left, start); 418 if(leftIndex == -1) return null; 419 int rightIndex = str.indexOf(right, leftIndex+1); 420 if(rightIndex == -1) return null; 421 422 return str[leftIndex+1..rightIndex]; 423 } 424 425 pure int indexOf(TChar)(inout TChar[] str, inout TChar ch, int startIndex = 0) nothrow @nogc @trusted 426 { 427 char[1] temp = [ch]; 428 return indexOf(str, cast(TChar[])temp, startIndex); 429 } 430 431 432 TString repeat(TString)(TString str, size_t repeatQuant) 433 { 434 TString ret; 435 for(int i = 0; i < repeatQuant; i++) 436 ret~= str; 437 return ret; 438 } 439 440 pure int count(TString)(inout TString str, inout TString countWhat) nothrow @nogc @safe 441 { 442 int ret = 0; 443 int index = 0; 444 445 //Navigates using indexOf 446 while((index = str.indexOf(countWhat, index)) != -1) 447 { 448 index+= countWhat.length; 449 ret++; 450 } 451 return ret; 452 } 453 454 alias countUntil = indexOf; 455 456 int lastIndexOf(TString)(inout TString str,inout TString toFind, int startIndex = -1) pure nothrow @nogc @safe 457 { 458 if(startIndex == -1) startIndex = cast(int)(str.length)-1; 459 460 int maxToFind = cast(int)toFind.length - 1; 461 int right = maxToFind; 462 if(right < 0) return -1; //Empty string case 463 464 465 for(int i = startIndex; i >= 0; i--) 466 { 467 if(str[i] == toFind[right]) 468 { 469 right--; 470 if(right == -1) 471 return i; 472 } 473 else if(right < maxToFind) 474 right++; 475 } 476 return -1; 477 } 478 int lastIndexOf(TChar)(TChar[] str, TChar ch, int startIndex = -1) pure nothrow @nogc @trusted 479 { 480 TChar[1] temp = [ch]; 481 return lastIndexOf(str, cast(TChar[])temp, startIndex); 482 } 483 484 T toDefault(T)(string s, T defaultValue = T.init) 485 { 486 if(s == "") 487 return defaultValue; 488 T v = defaultValue; 489 try{v = to!(T)(s);} 490 catch(Exception e){} 491 return v; 492 } 493 494 string fromStringz(const char* cstr) pure nothrow @nogc 495 { 496 import core.stdc.string:strlen; 497 size_t len = strlen(cstr); 498 return (len) ? cast(string)cstr[0..len] : null; 499 } 500 501 const(char)* toStringz(string str) pure nothrow 502 { 503 return (str~"\0").ptr; 504 } 505 pragma(inline, true) char toLowerCase(char c) pure nothrow @safe @nogc 506 { 507 if(c < 'A' || c > 'Z') 508 return c; 509 return cast(char)(c + ('a' - 'A')); 510 } 511 512 string toLowerCase(string str) 513 { 514 char[] ret = new char[](str.length); 515 for(uint i = 0; i < str.length; i++) 516 ret[i] = str[i].toLowerCase; 517 return cast(string)ret; 518 } 519 520 pragma(inline, true) char toUpper(char c) pure nothrow @nogc @safe 521 { 522 if(c < 'a' || c > 'z') 523 return c; 524 return cast(char)(c - ('a' - 'A')); 525 } 526 527 string toUpper(string str) pure nothrow @safe 528 { 529 char[] ret = new char[](str.length); 530 for(uint i = 0; i < str.length; i++) 531 ret[i] = str[i].toUpper; 532 return ret; 533 } 534 535 TChar[][] split(TChar)(TChar[] str, TChar separator) pure nothrow 536 { 537 TChar[1] sep = [separator]; 538 return split(str, cast(TChar[])sep); 539 } 540 541 TString[] split(TString)(TString str, TString separator) pure nothrow @safe 542 { 543 TString[] ret; 544 int last = 0; 545 int index = 0; 546 do 547 { 548 index = str.indexOf(separator, index); 549 if(index != -1) 550 { 551 ret~= str[last..index]; 552 last = index+= separator.length; 553 } 554 } 555 while(index != -1); 556 if(last != index) 557 ret~= str[last..$]; 558 return ret; 559 } 560 561 auto splitRange(TString, TStrSep)(TString str, TStrSep separator) pure nothrow @safe @nogc 562 { 563 struct SplitRange 564 { 565 TString strToSplit; 566 TStrSep sep; 567 TString frontStr; 568 int lastFound, index; 569 570 bool empty(){return frontStr == null && index == -1 && lastFound == -1;} 571 TString front() 572 { 573 if(frontStr == "") popFront(); 574 return frontStr; 575 } 576 void popFront() 577 { 578 if(index == -1 && lastFound == -1) 579 { 580 frontStr = null; 581 return; 582 } 583 index = indexOf(cast(TString)strToSplit, cast(TString)sep, index); 584 //When finding, take the string[lastFound..index] 585 if(index != -1) 586 { 587 frontStr = strToSplit[lastFound..index]; 588 lastFound = index+= sep.length; 589 } 590 //If index not found and there was a last, take the string[lastFound..$] 591 else if(lastFound != 0) 592 { 593 frontStr = strToSplit[lastFound..$]; 594 lastFound = -1; 595 } 596 //Just say there is no string 597 else 598 lastFound = -1; 599 } 600 } 601 602 return SplitRange(str, separator); 603 } 604 605 606 bool isNumber(TString)(in TString str) pure nothrow @nogc 607 { 608 if(!str) 609 return false; 610 bool isFirst = true; 611 bool hasDecimalSeparator = false; 612 foreach(c; str) 613 { 614 //Check for negative 615 if(isFirst) 616 { 617 isFirst = false; 618 if(c == '-') 619 continue; 620 } 621 //Can only check for '.' once. 622 if(!hasDecimalSeparator && c == '.') 623 hasDecimalSeparator = true; 624 else if(c < '0' || c > '9') 625 return false; 626 627 } 628 return true; 629 } 630 631 /** 632 This function will get the number at the end of the string. Used when you have numbered items such as frames: 633 walk_01, walk_02, etc 634 ```d 635 "test123".getNumericEnding == "123" 636 "123abc".getNumericEnding == "" 637 "123".getNumericEnding == "123" 638 ``` 639 */ 640 string getNumericEnding(string s) 641 { 642 if(!s) 643 return ""; 644 ptrdiff_t i = cast(ptrdiff_t)s.length - 1; 645 while(i >= 0) 646 { 647 if(!isNumeric(s[i])) 648 return s[i+1..$]; 649 i--; 650 } 651 return s; 652 } 653 654 655 pragma(inline, true) bool isUpperCase(TChar)(TChar c) @nogc nothrow pure @safe 656 { 657 return c >= 'A' && c <= 'Z'; 658 } 659 pragma(inline, true) bool isLowercase(TChar)(TChar c) @nogc nothrow pure @safe 660 { 661 return c >= 'a' && c <= 'z'; 662 } 663 664 pragma(inline, true) bool isAlpha(TChar)(TChar c) @nogc nothrow pure @safe 665 { 666 return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); 667 } 668 669 pragma(inline, true) bool isEndOfLine(TChar)(TChar c) @nogc nothrow pure @safe 670 { 671 return c == '\n' || c == '\r'; 672 } 673 674 pragma(inline, true) bool isNumeric(TChar)(TChar c) @nogc nothrow pure @safe 675 { 676 return (c >= '0' && c <= '9') || (c == '-'); 677 } 678 pragma(inline, true) bool isWhitespace(TChar)(TChar c) @nogc nothrow pure @safe 679 { 680 return (c == ' ' || c == '\t' || c.isEndOfLine); 681 } 682 683 TString[] pathSplliter(TString)(TString str) 684 { 685 TString[] ret; 686 687 TString curr; 688 for(uint i = 0; i < str.length; i++) 689 if(str[i] == '/' || str[i] == '\\') 690 { 691 ret~= curr; 692 curr = null; 693 } 694 else 695 curr~= str[i]; 696 ret~= curr; 697 return ret; 698 } 699 700 701 TString trim(TString)(TString str) pure nothrow @safe @nogc 702 { 703 if(str.length == 0) 704 return str; 705 706 size_t start = 0; 707 size_t end = str.length - 1; 708 while(start < str.length && str[start].isWhitespace) 709 start++; 710 711 while(end > 0 && str[end].isWhitespace) 712 end--; 713 714 return str[start..end+1]; 715 } 716 717 TString join(TString)(TString[] args, TString separator = "") 718 { 719 if(args.length == 0) return ""; 720 TString ret = args[0]; 721 for(int i = 1; i < args.length; i++) 722 ret~=separator~args[i]; 723 return ret; 724 } 725 726 unittest 727 { 728 assert(join(["hello", "world"], ", ") == "hello, world"); 729 assert(split("hello world", " ").length == 2); 730 assert(toDefault!int("hello") == 0); 731 assert(lastIndexOf("hello, hello", "hello") == 7); 732 assert(indexOf("hello, hello", "hello") == 0); 733 assert(replaceAll("\nTest\n", '\n') == "Test"); 734 735 assert(trim(" \n \thello there \n \t") == "hello there"); 736 assert(between(`string containing a "thing"`, `"`, `"`) == "thing"); 737 738 assert("test123".getNumericEnding == "123"); 739 assert("123abc".getNumericEnding == ""); 740 assert("123".getNumericEnding == "123"); 741 }